######################
#Replication Materials
######################

#######################################
#Quasi-Poisson Models with Simulations
#######################################
rm(list = ls())
setwd()
# Packages
pacman::p_load(rtweet,
               ggplot2,
               purrrlyr,
               dplyr,
               readxl,
               writexl,
               broom,
               tidyverse,
               dotwhisker,
               lubridate,
               stargazer,
               ggpubr, 
               jtools,
               rpart,
               caret,
               reshape2,
               ipred,
               RColorBrewer,
               rpart.plot,
               MASS,
               randomForest,
               dplyr,
               car,
               vtable,
               effects,
               interactions,
               doParallel,
               foreach
               
)

set.seed(0611)
#Import data

df <- readRDS("df.RDS")

df$conspir.gen <- as.factor(df$conspir.gen)
df$conspir.tusk <- as.factor(df$conspir.tusk)
df$conspir.po <- as.factor(df$conspir.po)  
df$conspir.rus <- as.factor(df$conspir.rus)
df$conspir.collusion <- as.factor(df$conspir.collusion)
df$month <- as.factor(df$month)

df$official <- as.factor(df$official)


##########
#QP Models
##########
##########

##########
#Likes
##########
m.conspir.po.fav <- glm(favorite_count ~ conspir.po*official + log_follow + 
                          log_friends + verified + month, 
                        family = "quasipoisson",
                        data = df)
m.conspir.tusk.fav <- glm(favorite_count ~ conspir.tusk*official + log_follow + 
                            log_friends + verified + month, 
                          family = "quasipoisson",
                          data = df)

m.conspir.rus.fav <- glm(favorite_count ~ conspir.rus*official + log_follow + 
                           log_friends + verified + month, 
                         family = "quasipoisson",
                         data = df)

m.conspir.collusion.fav <- glm(favorite_count ~ conspir.collusion*official + log_follow + 
                                 log_friends + verified + month, 
                               family = "quasipoisson",
                               data = df)
#repeat for stargazer
m1 <- glm(favorite_count ~ conspir.po*official + log_follow + 
            log_friends + verified + month, 
          family = "quasipoisson",
          data = df)
m2 <- glm(favorite_count ~ conspir.tusk*official + log_follow + 
            log_friends + verified + month, 
          family = "quasipoisson",
          data = df)

m4 <- glm(favorite_count ~ conspir.collusion*official + log_follow + 
            log_friends + verified + month, 
          family = "quasipoisson",
          data = df)
stargazer(m1, m2, m4, 
          header = FALSE,
          align = TRUE,
          type = "latex",
          #out = "Figures for Paper/app_fav_conspir_quasi.htm",
          label = "tab:off_app_fav_conspir_quasi",
          #column.sep.width = "-15pt",
          title = "Tweet Characteristics and Frequency of Likes- Quasi-Poisson Models",
          dep.var.caption = c("Number of Likes"),
          model.names = FALSE,
          omit = c("month"), #so it's prettier
          covariate.labels = c("PO Consp.",
                               "Tusk Consp.",
                               "Russia and PO Consp.",
                               "PiS Officials",
                               "Log(N Followers)",
                               "Log(N Friends)",
                               "Verified",
                               "PO Consp.*PiS Official",
                               "Tusk Consp.*PiS Official",
                               "PO and Russia Consp.*PiS Official",
                               "Constant"),
          notes.append = FALSE,
          notes = c("Standard Errors in parenthesis.", 
                    "*p $<$ 0.1; **p $<$ .05, ***p $<$ .01 (two-tailed tests)."))

##########
#Retweets
##########
m.conspir.po.rt <- glm(retweet_count ~ conspir.po*official + log_follow + 
                         log_friends + verified + month, 
                       family = "quasipoisson",
                       data = df)

m.conspir.tusk.rt <- glm(retweet_count ~ conspir.tusk*official + log_follow + 
                           log_friends + verified + month, 
                         family = "quasipoisson",
                         data = df)

m.conspir.rus.rt <- glm(retweet_count ~ conspir.rus*official + log_follow + 
                          log_friends + verified + month, 
                        family = "quasipoisson",
                        data = df)


m.conspir.collusion.rt <- glm(retweet_count ~ conspir.collusion*official + log_follow + 
                                log_friends + verified + month, 
                              family = "quasipoisson",
                              data = df)
#repeat for stargazer
m1 <- glm(retweet_count ~ conspir.po*official + log_follow + 
            log_friends + verified + month, 
          family = "quasipoisson",
          data = df)

m2 <- glm(retweet_count ~ conspir.tusk*official + log_follow + 
            log_friends + verified + month, 
          family = "quasipoisson",
          data = df)


m4 <- glm(retweet_count ~ conspir.collusion*official + log_follow + 
            log_friends + verified + month, 
          family = "quasipoisson",
          data = df)

stargazer(m1, m2, m4, 
          header = FALSE,
          align = TRUE,
          type = "latex",
          #out = "Figures for Paper/app_fav_conspir_quasi.htm",
          label = "tab:off_app_rt_conspir_quasi",
          #column.sep.width = "-15pt",
          title = "Tweet Characteristics and Frequency of Retweets- Quasi-Poisson Models",
          dep.var.caption = c("Number of Retweets"),
          model.names = FALSE,
          omit = c("month"), #so it's prettier
          covariate.labels = c("PO Consp.",
                               "Tusk Consp.",
                               "Russia and PO Consp.",
                               "PiS Official",
                               "Log(N Followers)",
                               "Log(N Friends)",
                               "Verified",
                               "PO Consp.*PiS Official",
                               "Tusk Consp.*PiS Official",
                               "Russia and PO Consp.*PiS Official",
                               "Constant"),
          notes.append = FALSE,
          notes = c("Standard Errors in parenthesis.", 
                    "*p $<$ 0.1; **p $<$ .05, ***p $<$ .01 (two-tailed tests)."))

###############
#Bootstrapping 
##############
#Create function for bootstrapping
boot_pi <- function(model, pdata, n, p) {
  odata <- model$data
  lp <- (1 - p) / 2
  up <- 1 - lp
  set.seed(0611)
  seeds <- round(runif(n, 1, 1000), 0)
  boot_y <- foreach(i = 1:n, .combine = rbind) %dopar% {
    set.seed(seeds[i])
    bdata <- odata[sample(seq(nrow(odata)), size = nrow(odata), replace = TRUE), ]
    bpred <- predict(update(model, data = bdata), type = "response", newdata = pdata)
    rpois(length(bpred), lambda = bpred)
  }
  boot_ci <- t(apply(boot_y, 2, quantile, c(lp, up)))
  return(data.frame(pred = predict(model, newdata = pdata, type = "response"), lower = boot_ci[, 1], upper = boot_ci[, 2]))
}
###
#PO
###
sim.df <- data.frame(conspir.po = sample(rep(c(0,1,0,1), length.out = 250)),
                     official = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))
sim.df$conspir.po <- as.factor(sim.df$conspir.po)
sim.df$official <- as.factor(sim.df$official)
sim.df$month <- as.factor(sim.df$month)

preds <-  predict.glm(m.conspir.po.fav, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.po.fav, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.po, official) %>%
                          summarize(across(pred:upper, mean)))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Calculate differences

np <- sort(c(preds3$pred2[3] - preds3$pred2[1], 
             preds3$upper2[3] - preds3$upper2[1], 
             preds3$lower2[3] - preds3$lower2[1]))
p <- sort(c(preds3$pred2[4] - preds3$pred2[2], 
            preds3$upper2[4] - preds3$upper2[2], 
            preds3$lower2[4] - preds3$lower2[2]))

po.fav.diff <- as.data.frame(rbind(c(np,
                                     0), #0 for officials no
                                   c(p,
                                     1)) ) #1 for officials yes



#Tusk
sim.df <- data.frame(conspir.tusk = sample(rep(c(0,1,0,1), length.out = 250)),
                     official = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))
sim.df$conspir.tusk <- as.factor(sim.df$conspir.tusk)
sim.df$official <- as.factor(sim.df$official)
sim.df$month <- as.factor(sim.df$month)
preds <-  predict.glm(m.conspir.tusk.fav, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.tusk.fav, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.tusk, official) %>%
                          summarize(across(pred:upper, mean)))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Calculate differences

np <- sort(c(preds3$pred2[3] - preds3$pred2[1], 
             preds3$upper2[3] - preds3$upper2[1], 
             preds3$lower2[3] - preds3$lower2[1]))
p <- sort(c(preds3$pred2[4] - preds3$pred2[2], 
            preds3$upper2[4] - preds3$upper2[2], 
            preds3$lower2[4] - preds3$lower2[2]))

tusk.fav.diff <- as.data.frame(rbind(c(np,
                                       0), #0 for officials no
                                     c(p,
                                       1)) ) #1 for officials yes


#Russia
sim.df <- data.frame(conspir.rus = sample(rep(c(0,1,0,1), length.out = 250)),
                     official = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))
sim.df$official <- as.factor(sim.df$official)
sim.df$conspir.rus <- as.factor(sim.df$conspir.rus)
sim.df$month <- as.factor(sim.df$month)
preds <-  predict.glm(m.conspir.rus.fav, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.rus.fav, sim.df, 1000, 0.95))

#Collusion
sim.df <- data.frame(conspir.collusion = sample(rep(c(0,1,0,1), length.out = 250)),
                     official = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))
sim.df$conspir.collusion <- as.factor(sim.df$conspir.collusion)
sim.df$official <- as.factor(sim.df$official)
sim.df$month <- as.factor(sim.df$month)
preds <-  predict.glm(m.conspir.collusion.fav, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.collusion.fav, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.collusion, official) %>%
                          summarize(across(pred:upper, mean)))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Calculate differences
np <- sort(c(preds3$pred2[3] - preds3$pred2[1], 
             preds3$upper2[3] - preds3$upper2[1], 
             preds3$lower2[3] - preds3$lower2[1]))
p <- sort(c(preds3$pred2[4] - preds3$pred2[2], 
            preds3$upper2[4] - preds3$upper2[2], 
            preds3$lower2[4] - preds3$lower2[2]))

col.fav.diff <- as.data.frame(rbind(c(np,
                                      0), #0 for officials no
                                    c(p,
                                      1)) ) #1 for officials yes


#############
#Combine Favs
#############
pred_diffs <- as.data.frame(rbind(po.fav.diff, tusk.fav.diff, col.fav.diff))
colnames(pred_diffs) <- c("LCI", "estimate", "UCI", "PiS")
pred_diffs$model <- c("PO", "PO", "Tusk", "Tusk", "Collusion", "Collusion")
pred_diffs$PiS <- as.factor(pred_diffs$PiS)
### plot of differences 
pd <- position_dodge(width = 0.4)

fav.diff <- ggplot(pred_diffs, aes(x = model, y = estimate, shape = PiS)) + 
  geom_point(position = pd) + 
  coord_flip() +
  geom_errorbar(aes(ymin = LCI, ymax = UCI), colour="black", width=.1, position = pd) +
  ylim(-125,150) + ylab("Difference in Predicted Number of Likes") + xlab("Conspiracy") +
  geom_hline(aes(yintercept = 0), color = "blue", linetype = "dotted")

#ggsave("Figures for Paper/PiS_Diff_Pred_N_Favs_Simulated.png", fav.diff)

pred_diffs_favs <- pred_diffs
#########
#Retweets
#########
###
#PO
###
sim.df <- data.frame(conspir.po = sample(rep(c(0,1,0,1), length.out = 250)),
                     official = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))


sim.df$conspir.po <- as.factor(sim.df$conspir.po)
sim.df$official <- as.factor(sim.df$official)
sim.df$month <- as.factor(sim.df$month)
preds <-  predict.glm(m.conspir.po.rt, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.po.rt, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.po, official) %>%
                          summarize(across(pred:upper, mean)))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Calculate differences
np <- sort(c(preds3$pred2[3] - preds3$pred2[1], 
             preds3$upper2[3] - preds3$upper2[1], 
             preds3$lower2[3] - preds3$lower2[1]))
p <- sort(c(preds3$pred2[4] - preds3$pred2[2], 
            preds3$upper2[4] - preds3$upper2[2], 
            preds3$lower2[4] - preds3$lower2[2]))

po.rt.diff <- as.data.frame(rbind(c(np,
                                    0), #0 for officials no
                                  c(p,
                                    1)) ) #1 for officials yes



#Tusk
sim.df <- data.frame(conspir.tusk = sample(rep(c(0,1,0,1), length.out = 250)),
                     official = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))
sim.df$conspir.tusk <- as.factor(sim.df$conspir.tusk)
sim.df$official <- as.factor(sim.df$official)
sim.df$month <- as.factor(sim.df$month)
preds <-  predict.glm(m.conspir.tusk.rt, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.tusk.rt, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.tusk, official) %>%
                          summarize(across(pred:upper, mean)))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Calculate differences
np <- sort(c(preds3$pred2[3] - preds3$pred2[1], 
             preds3$upper2[3] - preds3$upper2[1], 
             preds3$lower2[3] - preds3$lower2[1]))
p <- sort(c(preds3$pred2[4] - preds3$pred2[2], 
            preds3$upper2[4] - preds3$upper2[2], 
            preds3$lower2[4] - preds3$lower2[2]))
tusk.rt.diff <- as.data.frame(rbind(c(np,
                                      0), #0 for officials no
                                    c(p,
                                      1)) ) #1 for officials yes


#Russia
#Note: In our original submission, we included analyses for the CT that Russia
#or Putin caused the plane to crash. We maintain the analysis below for full
#transparency and to fully replicate the simulation seeds for all subsequent models.
sim.df <- data.frame(conspir.rus = sample(rep(c(0,1,0,1), length.out = 250)),
                     official = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))
sim.df$official <- as.factor(sim.df$official)
sim.df$conspir.rus <- as.factor(sim.df$conspir.rus)
sim.df$month <- as.factor(sim.df$month)
preds <-  predict.glm(m.conspir.rus.rt, sim.df, type = "response", se.fit = F)
#preds2 <- cbind(sim.df, boot_pi(m.conspir.rus.rt, sim.df, 1000, 0.95))

#Collusion
sim.df <- data.frame(conspir.collusion = sample(rep(c(0,1,0,1), length.out = 250)),
                     official = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))
sim.df$conspir.collusion <- as.factor(sim.df$conspir.collusion)
sim.df$official <- as.factor(sim.df$official)
sim.df$month <- as.factor(sim.df$month)
preds2 <- cbind(sim.df, boot_pi(m.conspir.collusion.rt, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.collusion, official) %>%
                          summarize(across(pred:upper, mean)))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Calculate differences
np <- sort(c(preds3$pred2[3] - preds3$pred2[1], 
             preds3$upper2[3] - preds3$upper2[1], 
             preds3$lower2[3] - preds3$lower2[1]))
p <- sort(c(preds3$pred2[4] - preds3$pred2[2], 
            preds3$upper2[4] - preds3$upper2[2], 
            preds3$lower2[4] - preds3$lower2[2]))
col.rt.diff <- as.data.frame(rbind(c(np,
                                     0), #0 for officials no
                                   c(p,
                                     1)) ) #1 for officials yes


#############
#Combine Rts
#############
pred_diffs <- as.data.frame(rbind(po.rt.diff, tusk.rt.diff, col.rt.diff))
colnames(pred_diffs) <- c("LCI", "estimate", "UCI", "PiS")
pred_diffs$model <- c("PO", "PO", "Tusk", "Tusk", "Collusion", "Collusion")
pred_diffs$PiS <- as.factor(pred_diffs$PiS)
### plot of differences 
pd <- position_dodge(width = 0.4)

rt.diff <- ggplot(pred_diffs, aes(x = model, y = estimate, shape = PiS)) + 
  geom_point(position = pd) + 
  coord_flip() +
  geom_errorbar(aes(ymin = LCI, ymax = UCI), colour="black", width=.1, position = pd) +
  ylab("Difference in Predicted Number of Retweets") + xlab("Conspiracy") +
  geom_hline(aes(yintercept = 0), color = "blue", linetype = "dotted")

#ggsave("Figures for Paper/PiS_Diff_Pred_N_Rts_Simulated.png", rt.diff)

pred_diff_rts <- pred_diffs


### Now for one image

pred_diff_rts$Measure <- c("Retweets")
pred_diffs_favs$Measure <- c("Likes")
pred_diffs_together <- rbind(pred_diff_rts, pred_diffs_favs)

ggplot(pred_diffs_together, aes(x = Measure, 
                                y = estimate, 
                                color = factor(PiS),
                                shape = factor(PiS))) + 
  geom_point(position = pd, size = 1.5) + 
  facet_wrap(~model, ncol = 1) +
  coord_flip() +
  geom_errorbar(aes(ymin = LCI, ymax = UCI), width=.15, position = pd) +
  ylim(-70,215) +
  labs(y = "Difference in the Predicted Level \n of Engagement When Content Included",
       x = "Conspiracy Theory",
       shape = "PiS Official?") +
  theme_classic(base_size=20) +
  theme(legend.position = "bottom") +
  scale_color_grey(start = .2, end = .5, name = "PiS Official?", labels = c("No", "Yes") )+
  scale_shape_discrete(name = "PiS Official?", labels = c("No", "Yes"))+
  geom_hline(aes(yintercept = 0), color = "blue", linetype = "dotted") -> together
together